#################################################################################################################################################
### Setup 

# Source setup file
setwd('~/Dropbox/Projects/Dissertation/Bill Text and Agenda Control/JOP Replication Files')
source('text_setup.R')
types <- c('hr', 'hjres', 's', 'sjres')

# Load data
predsH <- readRDS(file = 'Data/predVotes_House_all_rr.rds')
predsS <- readRDS(file = 'Data/predVotes_Senate_all_rr.rds')
pvdfH <- readRDS(file = 'Data/predVotes_modData_House_rr.rds')
pvdfH$type <- str_split(pvdfH$id, '_') %>% sapply(., function(x){x[2]}) %>% str_extract(., '[a-z]+')
pvdfH <- filter(pvdfH, type %in% types) %>% dplyr::select(., -type)
pvdfS <- readRDS(file = 'Data/predVotes_modData_Senate_rr.rds')
load(file = paste0('Data/predVotes_ROC.RData'))


#################################################################################################################################################
### Figure A1

### Function to make separation plots
sepPlot <- function(df){
  if(identical(df, predsH)){
    df <- filter(df, ivs == 'full')
  }
  df$real_y <- 0
  df$real_y[df$acc == 1 & df$predY == 1] <- 1
  df$real_y[df$acc == 0 & df$predY == 0] <- 1
  pdta <- dplyr::select(df, c(real_y, preds)) %>%
    filter(., !is.na(real_y) & !is.na(preds))
  pdta$real_y <- factor(pdta$real_y)
  pdta <- pdta[with(pdta, order(preds)), ]
  pdta[, 'index'] <- 1:nrow(pdta)
  p <- ggplot(data = pdta, aes(x = index, y = 1)) +
    geom_bar(aes(fill = real_y), stat = 'identity', position = 'identity', width = 1) +
    scale_fill_manual(values = c('#cccccc', '#525252'), labels = c('Nay', 'Yea'), guide = FALSE) +
    geom_path(aes(y = preds), size = 1.5) +
    labs(y = 'Pr(Passed)', x = 'Votes') + 
    theme(axis.text.x=element_blank(), axis.ticks.x=element_blank(), 
          panel.border = element_blank(), panel.grid = element_blank(), axis.line = element_blank() )
  p
}

### Run separation plots
sepS <- sepPlot(predsS)
sepH <- sepPlot(predsH)


### ROC Curve plot (with AUC)
predsS$real_y <- 0
predsS$real_y[predsS$acc == 0 & predsS$predY == 1] <- 1
predsS$real_y[predsS$acc == 1 & predsS$predY == 0] <- 1
predsS_roc <- filter(predsS, ivs == 'full')
cv_rocS <- roc(response = predsS_roc$real_y, predictor = predsS_roc$preds)
rocDF <- data.frame(sens = cv_rocS$sensitivities, spec = rev(cv_rocS$specificities))
auc <- cv_rocS$auc %>% str_extract(., '[0-9]+\\.[0-9]+') %>% unname() %>% num() %>% round(., 3)
rocS <- ggplot(data = rocDF, aes(x = spec, y = sens)) +
  geom_line(size = 1.5) +
  labs(x = 'Specificity', y = 'Sensitivity') + 
  ggplot2::annotate('text', x = 0.6, y = 0.4, label = paste0('AUC = ', auc), size = 5)
rocDF <- data.frame(sens = cv_roc$sensitivities, spec = rev(cv_roc$specificities))
auc <- cv_roc$auc %>% str_extract(., '[0-9]+\\.[0-9]+') %>% unname() %>% num() %>% round(., 3)
rocH <- ggplot(data = rocDF, aes(x = sens, y = spec)) +
  geom_line(size = 1.5) +
  labs(x = 'Specificity', y = 'Sensitivity') + 
  ggplot2::annotate('text', x = 0.6, y = 0.4, label = paste0('AUC = ', auc), size = 5)

### Plot together and save (WARNING: this is a *large* plot, hard to view)
plots <- plot_grid(rocH, sepH, rocS, sepS, ncol = 2)
pdf(file='Figures/fgA1.pdf', width=10, height=10)
plots
dev.off()



#####################################################################################################################################################
### Run regressions, pull together for table

### Function to run regressions and group together
regTable <- function(modDF, chamber, runParms, majStatus=NULL, interact=FALSE){
  
  # Make sure there is a variable for Congress in the DF for the models
  if(!'cong' %in% names(modDF)){
    modDF$cong <- str_split(modDF$id, '_') %>% 
      sapply(., function(x){x[1]}) %>%
      num()
  }
  
  # Subset runParms to majority status, if applicable
  if(majStatus=='maj') {
    runParms %<>% filter(maj==1)
  } else if(majStatus=='min') {
    runParms %<>% filter(maj==0)
  }
  
  # Loop through runParms
  modList <- c()
  for(ii in 1:nrow(runParms)){
    
    ### Subset modDF by party and Congress, remove extraneous columns
    df <- filter(modDF, cong == runParms[ii,]$congs & member_majority == runParms[ii,]$maj) %>%
      filter(., !is.na(weight_1)) %>%
      dplyr::select(., -c(cong, member_majority, maj_house, maj_sen, demvotesmajorpercent))
    
    ### Set up model variables
    # Pull IV names
    varNames <- names(df)
    if(chamber=='senate'){
      ivs <- varNames[!(varNames %in% c('dv', 'id', 'dual_maj', 'demvotesmajorpercent', 'sponsor'))]
    } else {
      ivs <- varNames[!(varNames %in% c('dv', 'id', 'dual_maj'))]
    }
    ivs <- ivs[!str_detect(ivs, 'weight_')]
    
    # Set up interactions between bill and member variables
    if(interact){
      memVar <- c('dim1', 'dim2', 'Gmargin_pct', 'pvi') %>% .[. %in% ivs]
      billVar <- c('sponsor', 'orig_chamber', 'cosponsr', 'int_majority') %>% .[. %in% ivs]
      ints <- expand.grid(memVar=memVar, billVar=billVar) %>%
      {paste(.[,'memVar'], .[,'billVar'], sep = '*')}
      ivs <- c(ivs, ints)  
    }
    
    # DV name
    dv <- str_subset(varNames, 'dv')
    
    # Formula object
    form <- paste(ivs, collapse = ' + ') %>%
      paste0(dv, ' ~ ', .) %>%
      formula()
    
    
    ### Run model, specify name (majority status and congress), add to modList
    # Run
    mod <- glm(form, data = df, family = binomial)
    
    # Specify model name
    maj <- ifelse(runParms[ii,]$maj==1, 'maj', 'min')
    modName <- paste0(maj, '_', runParms[ii,]$congs)
    
    # Add to model list with correct name
    modList[[ii]] <- mod
    names(modList)[[ii]] <- modName
    
  }
  
  ### Return
  return(modList)
  
}


### Run regressions, save model tables
# Set up parameters
maj <- c(0,1)
congs <- 104:114
toRun <- expand.grid(maj = maj, congs = congs)

# House regressions
majH <- regTable(modDF = pvdfH, chamber='house', runParms = toRun, majStatus='maj', interact=FALSE)
minH <- regTable(modDF = pvdfH, chamber='house', runParms = toRun, majStatus='min', interact=FALSE)

# House model tables
stargazer(majH, column.labels = char(congs), dep.var.caption ='', column.sep.width = '0pt', omit.table.layout = 'n', star.cutoffs = NA,
          dep.var.labels.include = FALSE, colnames=FALSE, digits=2, digits.extra=0,
          covariate.labels=c('DWNOM1', 'DWNOM2', 'Elec. Margin', 'PVI', 'Sponsor', 'Orig. Chamber', 'Cosponsors', 'Cosntant'))
stargazer(minH, column.labels = char(congs), dep.var.caption ='', column.sep.width = '0pt', omit.table.layout = 'n', star.cutoffs = NA,
          dep.var.labels.include = FALSE, colnames=FALSE, digits=2, digits.extra=0,
          covariate.labels=c('DWNOM1', 'DWNOM2', 'Elec. Margin', 'PVI', 'Sponsor', 'Orig. Chamber', 'Cosponsors', 'Cosntant'))

# Senate regressions
majS <- regTable(modDF = pvdfS, chamber='senate', runParms = toRun, majStatus='maj', interact=FALSE)
minS <- regTable(modDF = pvdfS, chamber='senate', runParms = toRun, majStatus='min', interact=FALSE)

# Senate model tables
stargazer(majS, column.labels = char(congs), dep.var.caption ='', column.sep.width = '0pt', omit.table.layout = 'n', star.cutoffs = NA,
          dep.var.labels.include = FALSE, colnames=FALSE, digits=2, digits.extra=0,
          covariate.labels=c('DWNOM1', 'DWNOM2', 'Elec. Margin', 'PVI', 'Orig. Chamber', 'Cosponsors', 'Cosntant'))
stargazer(minS, column.labels = char(congs), dep.var.caption ='', column.sep.width = '0pt', omit.table.layout = 'n', star.cutoffs = NA,
          dep.var.labels.include = FALSE, colnames=FALSE, digits=2, digits.extra=0,
          covariate.labels=c('DWNOM1', 'DWNOM2', 'Elec. Margin', 'PVI', 'Orig. Chamber', 'Cosponsors', 'Cosntant'))


